###############################################################################-
# Created By: Pietryka
# Creation Date:  April 16, 2019
# Updated Date: December 30, 2020
# Purpose: export models as tables
# Contact: mpietryka@fsu.edu
###############################################################################-



#  1. LOAD PACKAGES & DATA    -----------------------------

## 1A. LOAD PACKAGES    -----------------------------

library(tidyverse)   # DATA CLEANING FUNCTIONS
library(lme4)        # MULTILEVEL MODELS
library(texreg)      # DISPLAY MODELS



## 1B. LOAD DATA    -----------------------------
## models fit in "2-analysis/SC-Tables-Step1- Fit Models.R"

# LIST OF MODELS FOR MAIN TEXT
model_maintext_list <- read_rds("../Data/Derived/model_maintext_list.rds")

# LIST OF MODELS USING 3-,5-, AND 7- GRAMS
model_ngram_list <- read_rds("../Data/Derived/model_ngram_list.rds")

# MODEL USING JACCARD SIMILARITY
fit_jaccard_all5 <- read_rds("../Data/Derived/fit_jaccard_all5.rds")

# MODEL INCLUDING STOP WORDS AND NOT STEMMING
fit_full_all_nostop <- read_rds("../Data/Derived/fit_full_all_nostop.rds")


# 2. MAKE TABLE (MAIN TEXT) -----------------------

mod_names <- paste0("(", seq_along(model_maintext_list), ")")


the_cap <- "Multilevel linear regressions of dyadic text similarity (with standard errors in parentheses). Observations are dyadic, consisting of a newer, focal constitution and an older, source constitution. The outcome variable is the z-score of the proportion borrowed---the proportion of the focal constitution's fivegrams that appeared first in our corpus in the source constitution."


var_map <- list(
  same_state         = "Same-state dyad (0 = No; 1 = Yes)",
  us_from            = "US is the source (0 = No; 1 = Yes)",
  share_border       = "Share a border (0 = No; 1 = Yes)",
  `scale(distance)`    = "Absolute distance (Z score)",
  `I(scale(distance)^2)`  = "Absolute distance (Z score)<sup>2</sup>",
  distance_rel       = "Relative distance  (Z score)",
  `I(distance_rel^2)`  = "Relative distance (Z score)<sup>2</sup>",
  both_south         = "Southern dyad (0 = No; 1 = Yes)",
  year_to            = "Year focal constitution was ratified",
  year_from          = "Year source constitution was ratified",
  `scale(time_diff)`        = "Temporal difference (Z score)",
  `I(scale(time_diff)^2)` = "Temporal difference (Z score)<sup>2</sup>",
  `factor(party_same, levels = c(0, -1, 1))-1` = "Opposing parties  (0 = No; 1 = Yes)",
  `factor(party_same, levels = c(0, -1, 1))1` = "Same party  (0 = No; 1 = Yes)",
  `(Intercept)`      = "Intercept"
)

gof_names <- c(
  `AIC`= "AIC",
  `Num. obs.`= "N - Dyads",
  `Num. groups: from`= "N - Source constitutions",
  `Num. groups: to`= "N - Focal constitutions",
  `Var: from (Intercept)`= "&sigma;<sup>2</sup><sub>Source</sub>",
  `Var: to (Intercept)`= "&sigma;<sup>2</sup><sub>Focal</sub>",
  `Var: Residual`= "&sigma;<sup>2</sup><sub>residual</sub>"
)



group_list <- list(
  " " = 1:2,
  "Geography" = 3:8,
  "Time" = 9:12,
  "Partisan Congruence  (Reference = Mixed)" = 13:14,
  " " = 15
)



htmlreg(model_maintext_list,
        caption = " ",
        custom.model.names = mod_names,
        caption.above = TRUE,
        stars = 0.05,
        bold  = 0.05,
        groups = group_list,
        custom.coef.map = var_map,
        custom.gof.names = gof_names,
        single.row = FALSE,
        custom.note = "* p < 0.05 (two-tailed)",
        file = "Tables/models.doc",
        doctype = TRUE,
        html.tag = TRUE,
        head.tag = TRUE,
        body.tag = TRUE,
        include.loglik = FALSE,
        include.bic = FALSE,
        reorder.gof = c(2:7, 1)
)




# 3. MAKE TABLE (3-, 5-, and 7-grams) -----------------------

the_cap <- "Multilevel linear regressions of dyadic text similarity (with standard errors in parentheses). Observations are dyadic, consisting of a newer, focal constitution and an older, source constitution. The outcome variable is the z-score of the <pre>proportion borrowed</pre>---the proportion of the focal constitution's n-grams that appeared first in our corpus in the source constitution. Model 1 relies on three-grams. Model 2 relies on five-grams, as presented in the main text. Model 3 relies on seven-grams."


mod_names <- c("3-grams", "5-grams", "7-grams")


htmlreg(model_ngram_list,
        caption = " ",
        custom.model.names = mod_names,
        caption.above = TRUE,
        stars = 0.05,
        bold  = 0.05,
        groups = group_list,
        custom.coef.map = var_map,
        custom.gof.names = gof_names,
        single.row = TRUE,
        custom.note = "* p < 0.05 (two-tailed)",
        file = "Tables/models_357grams.doc",
        doctype = TRUE,
        html.tag = TRUE,
        head.tag = TRUE,
        body.tag = TRUE,
        include.loglik = FALSE,
        include.bic = FALSE,
        reorder.gof = c(2:7, 1)
)



# 4. MAKE TABLE (JACCARD SIMILARITY) -----------------------

the_cap <- "Multilevel linear regressions of dyadic text similarity (with standard errors in parentheses). Observations are dyadic, consisting of a newer, focal constitution and an older, source constitution. The outcome variable is the z-score of the Jaccard similarity---the number of five-grams appearing in both documents, divided by the total number of five-grams in either document. For the source constitution, we include only innovative fivegrams (fivegrams appearing for the first time in our corpus). For the focal constitution, we include all five-grams."




htmlreg(fit_jaccard_all5,
        caption = " ",
        caption.above = TRUE,
        stars = 0.05,
        bold  = 0.05,
        groups = group_list,
        custom.coef.map = var_map,
        include.loglik = FALSE,
        include.bic = FALSE,
        custom.gof.names = gof_names,
        single.row = TRUE,
        custom.note = "* p < 0.05 (two-tailed)",
        file = "Tables/models_jaccard.doc",
        doctype = TRUE,
        html.tag = TRUE,
        head.tag = TRUE,
        body.tag = TRUE
)



# 5. MAKE TABLE (NO STEMMING OR STOP WORDS REMOVED) -----------------------

the_cap <- "This model replicates Table 1, Model 5 from the main text, but without stemming the text or removing stop words. The estimates come from a multilevel linear regression of dyadic text similarity (with standard errors in parentheses). Observations are dyadic, consisting of a newer, focal constitution and an older, source constitution. The outcome variable is the z-score of the <pre>proportion borrowed</pre>---the proportion of the focal constitution's five-grams that appeared first in our corpus in the source constitution."



htmlreg(fit_full_all_nostop,
        caption = " ",
        caption.above = TRUE,
        stars = 0.05,
        bold  = 0.05,
        groups = group_list,
        custom.coef.map = var_map,
        include.loglik = FALSE,
        include.bic = FALSE,
        custom.gof.names = gof_names,
        single.row = TRUE,
        custom.note = "* p < 0.05 (two-tailed)",
        file = "Tables/models_nostop.doc",
        doctype = TRUE,
        html.tag = TRUE,
        head.tag = TRUE,
        body.tag = TRUE
)
